## ---- Auxiliary data (MICROM etc) ----
load("dat/proc-data/soep_imp.RData")
load("dat/proc-data/plz5_f_und_b.RData")
soep_microm <- read.dta("dat/proc-data/microm_2005_2018.dta")

## ---- Post-imputation variable generation (Part 1) ----
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate(plz = as.character(plz)) %>%
    mutate(plz = ifelse(nchar(plz) == 4, paste0("0", plz), plz)) %>%
    left_join(
      soep_microm %>%
        dplyr::select(-bula,-gkz,-kkz,-plz,-kr_kkz_rek,-gk_id,-pl_id,-gtyp),
      by = c("hh_id", "year")
    ) %>%
    left_join(
      plz5_f_und_b %>%
        as.data.frame() %>%
        dplyr::select(-AGS05max, -geometry) %>%
        distinct() %>%
        group_by(plz) %>%
        mutate_at(
          .vars = vars(rent, cmr_p50, cmr_arm),
          .funs = list(`2005` = ~ ifelse(length(.[year == 2005]) == 0L,
                                         NA,
                                         .[year == 2005]))
        ) %>%
        mutate(
          rent_pchg_2005 = rent / rent_2005 - 1,
          rent_achg_2005 = rent - rent_2005,
          cmr_p50_pchg_2005 = cmr_p50 / cmr_p50_2005 - 1,
          cmr_p50_achg_2005 = cmr_p50 - cmr_p50_2005,
          cmr_arm_pchg_2005 = cmr_arm / cmr_arm_2005 - 1,
          cmr_arm_achg_2005 = cmr_arm - cmr_arm_2005
        ) %>%
        ungroup(),
      by = c("plz", "year")
    ) %>%
    mutate(
      gtyp3 = case_when(
        gtyp %in% c("[1] Kernstaedte>==gr.Verdraum") ~ "urban",
        gtyp %in% c(
          "[2] Kernstaedte<==gr.Verdraum",
          "[9] Kernstaedte<==Verdansatz"
        ) ~ "urban",
        gtyp %in% c(
          "[3] Mi-zent.=hochverd.=gr.Verdraum",
          "[4] so.Gem.=hochverd.=gr.Verdraum",
          "[5] Mi-zent.=verd.=gr.Verdraum",
          "[6] so.Gem.=verd.=gr.Verdraum",
          "[10] Mi-zent.=verd.=Verdansatz",
          "[11] so.Gem.=verd.=Verdansatz"
        ) ~ "suburban",
        gtyp %in% c(
          "[7] Mi-zent.=laendl=gr.Verdraum",
          "[8] so.Gem.=laendl=gr.Verdraum",
          "[12] Mi-zent.=laendl=Verdansatz",
          "[13] so.Gem.=laendl=Verdansatz"
        ) ~ "rural",
        gtyp %in% c(
          "[14] Mi-zent.=verd.=laendl.",
          "[15] so.Gem.=verd.=laendl.",
          "[16] Mi-zent.=laendl=laendl.",
          "[17] so.Gem.=laendl=laendl."
        ) ~ "rural",
        TRUE ~ NA_character_
      )
    )
  
  ## ---- Economic Risk ----
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate(unemp = ifelse(lm_part == "Unemployed", 1, 0)) %>%
    group_by(myclass4_r, age5, fem, east, year) %>%
    mutate(risk = weighted.mean(unemp, w = weight)) %>%
    ungroup() %>%
    mutate(risk = ifelse(lm_part %in% c("Active", "Unemployed", "Atypical"), risk, NA))
  
  ## ---- Voting ----
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(id) %>%
    mutate(
      vote2013 = ifelse(!is.null(vote[year == 2014]),
                        as.character(vote[year == 2014]),
                        NA_character_),
      vote2017 = ifelse(!is.null(vote[year == 2018]),
                        as.character(vote[year == 2018]),
                        NA_character_)
    ) %>%
    ungroup()
  
  ## Since when in same PLZ5/AGS5 area
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate(
      hh_in_current_plz = hh_at_current_address,
      hh_in_current_kkz = hh_at_current_address,
      chg_kkz_full = ifelse(chg_kkz_full == "[1] Yes", 1, 0),
      chg_zip_full = ifelse(chg_zip_full == "[1] Yes", 1, 0),
      chg_kkz_full = ifelse(is.na(chg_kkz_full), 0, chg_kkz_full),
      chg_zip_full = ifelse(is.na(chg_zip_full), 0, chg_zip_full)
    ) %>%
    arrange(id, year) %>%
    group_by(id) %>%
    mutate_at(.vars = vars(chg_zip_full, chg_kkz_full),
              .funs = cumsum) %>%
    group_by(id, chg_zip_full) %>%
    mutate(hh_in_current_zip = ifelse(chg_zip_full == 0,
                                      min(hh_at_current_address[chg_zip_full == 0]),
                                      min(year))) %>%
    group_by(id, chg_kkz_full) %>%
    mutate(hh_in_current_kkz = ifelse(chg_kkz_full == 0,
                                      min(hh_at_current_address[chg_kkz_full == 0]),
                                      min(year))) %>%
    ungroup()
  
  ## Main breadwinner within households (by year)
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(hh_id, year) %>%
    mutate(main_breadwinner = (prop_personal_hinc = max(prop_personal_hinc))) %>%
    ungroup()
  
  ## Dummy generation and within-transformation
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    bind_cols(soep_imp$imputations[[m]] %>%
                to_dummy(lm_part, suffix = "label")) %>%
    bind_cols(soep_imp$imputations[[m]] %>%
                to_dummy(hh_comp, suffix = "label")) %>%
    group_by(id) %>%
    mutate_at(
      .vars = vars(
        owner,
        mover,
        east,
        starts_with("lm_part_"),
        starts_with("hh_comp_"),
        hh_mmb,
        hh_prop_ecact,
        prop_personal_hinc,
        log_hinc_eq,
        cold_rent_sqm,
        cold_rent_load,
        home_size,
        asset_ov_ttl_t
      ),
      .funs = list(umn = ~ mean(.),
                   cwu = ~ . - mean(.))
    ) %>%
    ungroup()
}


## ---- Post-imputation variable generation (Part 2) ----
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    bind_cols(soep_imp$imputations[[m]] %>%
                to_dummy(edu5, suffix = "label")) %>%
    bind_cols(soep_imp$imputations[[m]] %>%
                to_dummy(myclass4_r, suffix = "label")) %>%
    group_by(id) %>%
    mutate_at(
      .vars = vars(
        starts_with("edu5_"),
        starts_with("myclass4_r_"),
        risk,
        rent,
        cmr_arm
      ),
      .funs = list(
        umn = ~ mean(., na.rm = T),
        cwu = ~ . - mean(., na.rm = T)
      )
    ) %>%
    ungroup()
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(id) %>%
    mutate_at(
      .vars = vars(cold_rent_sqm,
                   cold_rent_load),
      .funs = list(
        umn = ~ mean(., na.rm = T),
        cwu = ~ . - mean(., na.rm = T)
      )
    ) %>%
    ungroup()
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    dplyr::select(
      -contains("_umn_umn"),-contains("_umn_cwu"),-contains("_cwu_umn"),-contains("_cwu_cwu")
    )
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate_at(.vars = vars(all_of(names(
      soep_imp$imputations[[m]]
    )[sapply(soep_imp$imputations[[m]], function (x)
      any(is.nan(x)))])),
    .funs = ~ ifelse(is.nan(.), NA, .))
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(id) %>%
    mutate_at(.vars = vars(age),
              .funs = list(
                umn = ~ mean(., na.rm = T),
                cwu = ~ . - mean(., na.rm = T)
              )) %>%
    ungroup()
}

## ---- Post-imputation variable generation (Part 3) ----
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate(
      afd = as.numeric(partyid == "AfD"),
      cdu = as.numeric(partyid == "CDU/CSU"),
      fdp = as.numeric(partyid == "FDP"),
      gre = as.numeric(partyid == "Green"),
      lef = as.numeric(partyid == "Left"),
      non = as.numeric(partyid == "None"),
      oth = as.numeric(partyid == "Others"),
      spd = as.numeric(partyid == "SPD")
    ) %>%
    mutate(
      vote_afd = case_when(
        year == 2014 ~ as.numeric(vote2013 == "AfD"),
        year == 2018 ~ as.numeric(vote2017 == "AfD"),
        TRUE ~ NA_real_
      ),
      vote_gre = case_when(
        year == 2014 ~ as.numeric(vote2013 == "Green"),
        year == 2018 ~ as.numeric(vote2017 == "Green"),
        TRUE ~ NA_real_
      )
    )
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    group_by(id) %>%
    mutate(first_year = min(year)) %>%
    ungroup()
}
for (m in seq_along(soep_imp$imputations)) {
  soep_imp$imputations[[m]] <- soep_imp$imputations[[m]] %>%
    mutate(year_fac = as.factor(year))
}

## Save
save(soep_imp, file = "dat/proc-data/soep_imp.RData")